** Organize NLSY data mother-child - main data


* ATTENTION: THIS DO FILE WILL TAKE A WHILE TO RUN

* Important: don't change, rename or order raw file variables, 
* the order is important for this do file to work properly



********************************************************************************
** CPI DATA  
********************************************************************************
import excel "$rawpath\SeriesReport-20200703100413_ac877d.xlsx", sheet("BLS Data Series") cellrange(A12:B54) firstrow clear

mkmat Year Annual, mat(cpi)

local cpi2019 = Annual[_N]


********************************************************************************
** CHILD DATA
********************************************************************************

** Open variables children
use "$rawpath\000_cnlsy_child_controlvariables_raw.dta", clear


* KEEP only children born after the survey started, otherwise no maternal employment information
* After 1994 several varible are every 2 years
keep if dob_year>=1979 & dob_year<=2008
* For children born after 2010 we probably won't have data on the outcomes

** Race dummies
qui gen black = (race==2) 
qui gen hispanic = (race==1)

** Sex dummy 
qui gen male = 0 if sex>0
qui replace male=(sex==1)

** Used in replication but not here
drop nh0_2????_???? nh3_5????_???? nh6_11????_???? nh12_17????_????

drop csasage2002_2002 nhchld1989_1989 nhchld1990_1990


* Mother's spouse present in year of birth  
********************************************************************************

gen spouse_atbirth = 0 
  
local N = _N
forvalues i=1(1)`N' {
	
	* Each child DOB information
	local year_birth = dob_year in `i'
	
	* no info for 1978 and odd years after 1994
	cap replace spouse_atbirth = 1 if spouse`year_birth'_`year_birth'==1 in `i'
	cap replace spouse_atbirth = . if spouse`year_birth'_`year_birth'<0	in `i'
	
	* Status year before birth
	* for children born in odd years after 1994 we use the year before birth 
	* also for children with missing in the year of birth
	local year_birth = `year_birth' - 1 
	cap replace spouse_atbirth = 1 if spouse_atbirth ==. & spouse`year_birth'_`year_birth'==1 in `i'
	cap replace spouse_atbirth = . if spouse`year_birth'_`year_birth'<0	in `i'
	
}	
  
drop spouse????_????  

* Mother's spouse highest grade completed up to date of birth
********************************************************************************

*generate highest grade completed at birth
qui gen hgc_birth_spouse_mom = .

local N = _N
forvalues i=1(1)`N' {

	* Each child DOB information
	local year_birth = dob_year in `i'
	
	forvalues year = `year_birth'(-1)1979 {
		cap replace hgc_birth_spouse_mom = hgcsps`year'_`year' ///
	if (hgc_birth_spouse_mom==. & hgcsps`year'_`year' != . & hgcsps`year'_`year' > 0) in `i'
		
	}
	
}

recode hgc_birth_spouse_mom 95=.

drop hgcsps????_????
*/



*******************************************************************************
* MOTHERS INFORMATION
*******************************************************************************

merge m:1 mom_pid using "$rawpath\000_NLSY79_basiccontrols_mother.dta", keep(master matched) nogen

drop hhi_final_relcode_head*



*Mother's Race
********************************************************************************

qui gen black_mom = 0
qui replace black_mom = 1 if race_mom == 2

qui gen hispanic_mom = 0
qui replace hispanic_mom = 1 if race_mom == 1


*Mother's Age at First Marriage
********************************************************************************
qui gen age_mar_mom = age1m_xrnd 
qui replace age_mar_mom = . if age1m_xrnd  < 0
qui gen teen_mar_mom = 0
qui replace teen_mar_mom = 1 if age_mar < 20 & age_mar != .

drop age1m_xrnd


*Mother's AFQT
********************************************************************************
rename afqt_3_1981 afqt_mom
qui replace afqt_mom = . if afqt_mom < 0
qui replace afqt_mom = afqt_mom/1000 /*in percentile, from 0 to 100 */


*Mother's scholling  in year of birth
********************************************************************************
*generate highest grade completed at birth
qui gen hgc_birth_mom = .

local N = _N
forvalues i=1(1)`N' {

	* Each child DOB information
	local year_birth = dob_year in `i'
	
	* no info for 1978 and odd years after 1994
	
	forvalues year = `year_birth'(-1)1979 {
	
		local yy = substr("`year'", 3, 2)
		cap replace hgc_birth_mom = hgcrev`yy'_`year' ///
			if hgc_birth_mom == . & hgcrev`yy'_`year' > 0 in `i'
	}
}	
  


*generate indicators for high school and college completion
qui gen hs_birth_mom = .
qui replace hs_birth_mom  = 0 if hgc_birth_mom < 12 & hgc_birth_mom != .
qui replace hs_birth_mom = 1 if hgc_birth_mom  >= 12 & hgc_birth_mom != .

qui gen col_birth_mom = .
qui replace col_birth_mom = 0 if hgc_birth_mom <= 12 & hgc_birth_mom != .
qui replace col_birth_mom = 1 if hgc_birth_mom > 12 & hgc_birth_mom != .

qui gen col_com_birth_mom = .
qui replace col_com_birth_mom = 0 if hgc_birth_mom < 16 & hgc_birth_mom != .
qui replace col_com_birth_mom = 1 if hgc_birth_mom >= 16 & hgc_birth_mom != .



*generate highest grade completed, ever reported
qui gen hgc_ever_mom = .
qui replace hgc_ever_mom = hgcrev16_2016 if hgcrev16_2016!= . & hgcrev16_2016> 0
local years_reverse 2014 2012 2010 2008 2006 2004 2002 2000 1998 1996 1994 1993 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979
foreach year of local years_reverse {
	local yy = substr("`year'", 3, 2)
	qui replace hgc_ever_mom = hgcrev`yy'_`year' if hgc_ever_mom == . & (hgcrev`yy'_`year' != . & hgcrev`yy'_`year' > 0)
}

*generate indicators for high school and college completion
qui gen hs_ever_mom = .
qui replace hs_ever_mom  = 0 if hgc_ever_mom < 12 & hgc_ever_mom != .
qui replace hs_ever_mom = 1 if hgc_ever_mom  >= 12 & hgc_ever_mom != .

qui gen col_ever_mom = .
qui replace col_ever_mom = 0 if hgc_ever_mom < 16 & hgc_ever_mom != .
qui replace col_ever_mom = 1 if hgc_ever_mom >= 16 & hgc_ever_mom != .

drop hgcrev??_????


*Mother's marriage in year of birth
********************************************************************************

gen mar_status_birth = 0 
  
local N = _N
forvalues i=1(1)`N' {
	
	* Each child DOB information
	local year_birth = dob_year in `i'
	
	* no info for 1978 and odd years after 1994
	cap replace mar_status_birth = 1 if marstat_key_`year_birth'==1	in `i'
	cap replace mar_status_birth = . if marstat_key_`year_birth'<0	in `i'
	
	* Status year before birth
	* for children born in odd years after 1994 we use the year before birth 
	* also for children with missing in the year of birth
	local year_birth = `year_birth' - 1 
	cap replace mar_status_birth = 1 if mar_status_birth ==. & marstat_key_`year_birth'==1 in `i'
	cap replace mar_status_birth = . if marstat_key_`year_birth'<0	in `i'
	

}	
  
drop marstat_*


*Mother's poverty status year of birth
********************************************************************************

gen pov_atbirth_mom = 0 
  
local N = _N
forvalues i=1(1)`N' {
	
	* Each child DOB information
	local year_birth = dob_year in `i'
	
	* no info for 1978 and odd years after 1994
    cap replace pov_atbirth_mom = 1 if povstatus_`year_birth'==1	in `i'
	cap replace pov_atbirth_mom = . if povstatus_`year_birth'<0	in `i'
	
	* Status year before birth
	* for children born in odd years after 1994 we use the year before birth 
	* also for children with missing in the year of birth
	local year_birth = `year_birth' - 1 
	cap replace pov_atbirth_mom = 1 if pov_atbirth_mom==. & povstatus_`year_birth'==1 in `i'
	cap replace pov_atbirth_mom = . if povstatus_`year_birth'<0	in `i'
	
}	
  
drop povstatus_*


* Mother's region of residence in the year of birth
********************************************************************************

gen region_mom = 0 
  
local N = _N
forvalues i=1(1)`N' {
	
	* Each child DOB information
	local year_birth = dob_year in `i'
	
	* no info for 1978 and odd years after 1994
	cap replace region_mom = region_`year_birth' in `i'
	cap replace region_mom = . if region_`year_birth'<0 in `i'
	
	* Status year before birth
	* for children born in odd years after 1994 we use the year before birth 
	* also for children with missing in the year of birth
	local year_birth = `year_birth' - 1 
	cap replace region_mom = 1 if region_mom==. & region_`year_birth'==1 in `i'
	cap replace region_mom = . if region_`year_birth'<0	in `i'
	
}	
  
drop region_???? 
 
* Mother's average wage in the child's first 3 years
********************************************************************************

* Note: wage and hours worked variables are about income from work in the PAST YEAR
* Not sure if child's first three years includes the birth year or not

* First rename to standard name
desc q13_5_* q13_18_*, varlist
foreach var in `r(varlist)' {
	local new_name = usubinstr("`var'", "_trunc", "", .)
	local new_name = usubinstr("`new_name'", "_revised", "", .)
	rename `var' `new_name'
}

local years "2016 2014 2012 2010 2008 2006 2004 2002 2000 1998 1996 1994 1993 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979"

* Create hourly wage variable
foreach year in `years' {
	
	replace q13_5_`year' = . if q13_5_`year'<0
	replace hrswk_pcy_`year' = . if hrswk_pcy_`year'<0
	gen wage_hour_`year' = q13_5_`year'/hrswk_pcy_`year'
	replace wage_hour_`year' = 0 if hrswk_pcy_`year'==0
}

qui gen wage_hour_year0_mom = .
qui gen wage_hour_year1_mom = .
qui gen wage_hour_year2_mom = .
qui gen wage_hour_year3_mom = . 

qui gen wage_hour_beforebirth_mom = . 
 
local N = _N
forvalues i=1(1)`N' { 

	* Each child DOB information
	local year_birth = dob_year in `i'
	
	* Values in dolars of 2019
	local year_cpi = `year_birth' - 1978 
	
	cap replace wage_hour_beforebirth_mom = wage_hour_`year_birth'*(`cpi2019'/cpi[`year_cpi',2]) in `i'
			
	* no info for odd years after 1994
		local year0 = `year_birth' + 1
		local year1 = `year_birth' + 2
		local year2 = `year_birth' + 3
		local year3 = `year_birth' + 4
		
		cap replace wage_hour_year0_mom = wage_hour_`year0'*(`cpi2019'/cpi[`year_cpi'+1,2]) in `i'
		cap replace wage_hour_year1_mom = wage_hour_`year1'*(`cpi2019'/cpi[`year_cpi'+2,2]) in `i'
		cap replace wage_hour_year2_mom = wage_hour_`year2'*(`cpi2019'/cpi[`year_cpi'+3,2]) in `i'
		cap replace wage_hour_year3_mom = wage_hour_`year3'*(`cpi2019'/cpi[`year_cpi'+4,2]) in `i' 	
} 

drop q13_5_* hrswk_pcy_* wage_hour_????


* Mother's SPOUSE average wage in the child's year of birth
********************************************************************************

* Note: variable is about income from work in the PAST YEAR

qui gen inc_spouse_mom = .
  
local N = _N
forvalues i=1(1)`N' { 

	* Each child DOB information
	local year_birth = dob_year in `i'
	
	* Values in dolars of 2019
	local year_cpi = `year_birth' - 1978 
	
	* no info for odd years after 1994
		local year1 = `year_birth' + 1
		
		cap replace inc_spouse_mom = q13_18_`year1'*(`cpi2019'/cpi[`year_cpi'+1,2]) in `i' 
		cap replace inc_spouse_mom = . if (q13_18_`year1'<0) in `i'
		
	* Status year before birth
	* for children born in odd years after 1994 we use the year before birth 
	* also for children with missing in the year of birth
	local year1 = `year_birth'
	cap replace inc_spouse_mom = q13_18_`year1'*(`cpi2019'/cpi[`year_cpi'+1,2]) if inc_spouse_mom==. in `i'
	cap replace inc_spouse_mom = . if (q13_18_`year1'<0) in `i'	
	
} 

drop q13_18_????


* Mother's HH income in the child's 3 first year of birth
********************************************************************************

* Note: variable is about income from work in the PAST YEAR

gen income_hh_before1_mom = .
gen income_hh_before2_mom = .
gen income_hh_before3_mom = .
gen income_hh_year0_mom = .
gen income_hh_year1_mom = .
gen income_hh_year2_mom = .
gen income_hh_year3_mom = .
  
local N = _N
forvalues i=1(1)`N' { 

	* Each child DOB information
	local year_birth = dob_year in `i' 
	
	* Values in dolars of 2019
	local year_cpi = `year_birth' - 1978 
	
	* no info for odd years after 1994
	local yearb1 = `year_birth' 
	local yearb2 = `year_birth' - 1
	local yearb3 = `year_birth' - 2
	local year0 = `year_birth' + 1
	local year1 = `year_birth' + 2
	local year2 = `year_birth' + 3
	local year3 = `year_birth' + 4
		
	cap replace income_hh_before1_mom = tnfi_trunc_`yearb1'*(`cpi2019'/cpi[`year_cpi'+0,2]) in `i' 	
	cap replace income_hh_before2_mom = tnfi_trunc_`yearb2'*(`cpi2019'/cpi[`year_cpi'-1,2]) in `i' 
	cap replace income_hh_before3_mom = tnfi_trunc_`yearb3'*(`cpi2019'/cpi[`year_cpi'-2,2]) in `i' 
	
		
	cap replace income_hh_year0_mom = tnfi_trunc_`year0'*(`cpi2019'/cpi[`year_cpi'+1,2]) in `i' 	
	cap replace income_hh_year1_mom = tnfi_trunc_`year1'*(`cpi2019'/cpi[`year_cpi'+2,2]) in `i' 
	cap replace income_hh_year2_mom = tnfi_trunc_`year2'*(`cpi2019'/cpi[`year_cpi'+3,2]) in `i' 
	cap replace income_hh_year3_mom = tnfi_trunc_`year3'*(`cpi2019'/cpi[`year_cpi'+4,2]) in `i' 
	
} 

replace income_hh_before1_mom = . if income_hh_before1_mom <0
replace income_hh_before2_mom = . if income_hh_before2_mom <0
replace income_hh_before3_mom = . if income_hh_before3_mom <0

replace income_hh_year0_mom = . if income_hh_year0_mom <0
replace income_hh_year1_mom = . if income_hh_year1_mom <0
replace income_hh_year2_mom = . if income_hh_year2_mom <0
replace income_hh_year3_mom = . if income_hh_year3_mom <0

drop tnfi_trunc_????


* Mother's HH members in the child's year of birth
********************************************************************************

gen famsize = .

local N = _N
forvalues i=1(1)`N' { 

	* Each child DOB information
	local year_birth = dob_year  in `i' 
	
	* no info for odd years after 1994
	cap replace famsize = famsize_`year_birth' in `i' 
	cap replace famsize = . if (famsize_`year_birth'<0) in `i'
	
	* Status year before birth
	* for children born in odd years after 1994 we use the year after birth 
	* also for children with missing in the year of birth
	local year_birth = `year_birth' + 1
	cap replace famsize = famsize_`year_birth' if famsize==. in `i' 
	cap replace famsize = . if (famsize_`year_birth'<0) in `i'
	
	
} 

gen lnfamsize =  ln(famsize)

drop famsize_????




********************************************************************************
** MOTHER NON COGNITIVE MEASURE 
********************************************************************************
 
** Open variables achievement outcomes children, keep relevant variables
merge m:1 mom_pid using "$rawpath\000_NLSY79_noncognitive_mother.dta", nogen ///
keep(master matched)

forvalues i=1(1)9 {
	replace rosenberg_esteem_00000`i'_1980 =. if rosenberg_esteem_00000`i'_1980<0
	if `i'<=4 {
	 replace rotter_`i'a_1979 = . if rotter_`i'a_1979<0
	}
}
replace rosenberg_esteem_000010_1980 =. if rosenberg_esteem_000010_1980<0

** See NLSY codebook for interpretation of each variable
** Not using the self-esteem and control index available from NLSY since not sure how they are constructed


** On Control 

* 1) ROTTER: DEGREE OF CONTROL R HAS OVER DIRECTION OF OWN LIFE 
*ANSWER: 1: In control 2: Not in control
* 2) ROTTER: IMPORTANCE OF PLANNING 
*ANSWER: 1: R's plans work 2: Matter of luck
* 3) ROTTER: IMPORTANCE OF LUCK
*ANSWER: 1: LUCK NOT FACTOR 2: FLIP A COIN
* 4) ROTTER: DEGREE OF INFLUENCE R HAS OVER OWN LIFE
*ANSWER: 1:   1 LUCK BIG ROLE  2 LUCK NO ROLE

* Change such that higher score means in control
replace rotter_1a_1979 = 0 if rotter_1a_1979==2
replace rotter_2a_1979 = 0 if rotter_2a_1979==2
replace rotter_3a_1979 = 0 if rotter_3a_1979==2
replace rotter_4a_1979 = rotter_4a_197 -1 /*answer is exchange in this item */


** Mother's control by factor analysis
factor rotter_1a_1979 rotter_2a_1979 rotter_3a_1979 rotter_4a_1979, ipf factors(1) 
rotate
predict control_mom



** Self-esteem answers 1: strongly agree to 4 strongly disagree

*1) ROSENBERG SELF-ESTEEM 80 INT - I AM A PERSON OF WORTH
*2) ROSENBERG SELF-ESTEEM 80 INT - I HAVE A NUMBER OF GOOD QUALITIES
*3) ROSENBERG SELF-ESTEEM 80 INT - I AM INCLINED TO FEEL THAT I AM A FAILURE
*4) ROSENBERG SELF-ESTEEM 80 INT - I AM AS CAPABLE AS OTHERS
*5) ROSENBERG SELF-ESTEEM 80 INT - I FEEL I DO NOT HAVE MUCH TO BE PROUD OF
*6) ROSENBERG SELF-ESTEEM 80 INT - I HAVE A POSITIVE ATTITUDE
*7) ROSENBERG SELF-ESTEEM 80 INT - I AM SATISFIED WITH MYSELF
*8) ROSENBERG SELF-ESTEEM 80 INT - I WISH I HAD MORE SELF-RESPECT
*9) ROSENBERG SELF-ESTEEM 80 INT - I FEEL USELESS AT TIMES
*10) ROSENBERG SELF-ESTEEM 80 INT - I SOMETIMES THINK I AM "NO GOOD" AT ALL

replace rosenberg_esteem_000001_1980 = 5-rosenberg_esteem_000001_1980
replace rosenberg_esteem_000002_1980 = 5-rosenberg_esteem_000002_1980
replace rosenberg_esteem_000004_1980 = 5-rosenberg_esteem_000004_1980
replace rosenberg_esteem_000006_1980 = 5-rosenberg_esteem_000006_1980
replace rosenberg_esteem_000007_1980 = 5-rosenberg_esteem_000007_1980


** Mother's self-esteem by factor analysis
factor ///
rosenberg_esteem_000001_1980 rosenberg_esteem_000002_1980 rosenberg_esteem_000003_1980 ///
rosenberg_esteem_000004_1980 rosenberg_esteem_000005_1980 rosenberg_esteem_000006_1980 ///
rosenberg_esteem_000007_1980 rosenberg_esteem_000008_1980 rosenberg_esteem_000009_1980 ///
rosenberg_esteem_000010_1980, ipf factors(1) 
rotate
predict seft_esteem_mom


corr seft_esteem_mom control_mom


** Mother's non cognitive by factor analysis
factor control_mom seft_esteem_mom, ipf factors(1) 
rotate
predict noncog_mom


drop rotter_score_1979 rosenberg_esteem_0000??_1987 rosenberg_esteem_score_1987 rosenberg_esteem_score_1980
drop rotter_1a_1979  - rosenberg_esteem_000010_1980


********************************************************************************
** MATERNAL WEEKLY EMPLOYMENT
********************************************************************************

merge m:1 mom_pid using "$rawpath\000_NLSY79_week_hourswork.dta", keep(master matched) 
rename _m merge_workmother

merge m:1 mom_pid using "$rawpath\000_NLSY79_week_workstatus.dta", keep(master matched)
drop _m

** Some years have 53 weeks, we will delete them
drop status_work_????_53 hrs_worked_????_53


** Create maternal employment with respect to the child date of birth
** DO NOT ORDER VARIABLES, ORDER OF VARIABLES IN WEEK AND YEAR MATTERS!
********************************************************************************

* Note: We don't know the day of birth, only month, so I will use the 1th day of the month which will generate some error

* Edit work status variable to be dummy
qui desc status_work_*, varlist
foreach var in `r(varlist)' {
	qui replace `var' = (`var'>=7) if `var'!=.
}

* Date of birth

* Discard the first three months following birth. For instance, child born in July discard august, september, october and start counting from november on
qui gen date_birth = mdy(dob_month, 1, dob_year)

*qui gen date_week_birth = wofd(date_birth)

qui gen week_start = week(date_birth + 30.5*4)
qui gen year_start = year(date_birth + 30.5*4)

qui gen week_end = week(date_birth + 30.5*4 -7)
qui gen year_end = year(date_birth + 30.5*4 -7)


drop if date_birth==.

* Weeks work in the first year of life
qui gen hrs_worked_1year = 0
qui gen hrs_worked_2year = 0
qui gen hrs_worked_3year = 0

* Employment in the first year of life
qui gen weeks_worked_1year = 0
qui gen weeks_worked_2year = 0
qui gen weeks_worked_3year = 0

** Hours worked year before birth
qui gen hrs_worked_yearbefore = .

* To adjust years with 53 weeks
qui gen n_weeks_1year =  .
qui gen n_weeks_2year =  .
qui gen n_weeks_3year =  .

local N = _N
forvalues i=1(1)`N' {
	
	* Each child DOB information
	local week_start= week_start[`i']
	local year_start = year_start[`i']
	
	local week_end= week_end[`i']
	local year_end = year_end[`i']
	
	local year_beforebirth = dob_year[`i'] - 1
	
	
	* To consider total of 52 weeks in a year after birth 
	local previous_start_week = `week_start' - 1 
	
	
	*********************************
	** Hours worked year before birth
	
	qui desc hrs_worked_`year_beforebirth'*, varlist
	qui local varlist=usubinstr("`r(varlist)'", " ", " + ",.)
	qui replace hrs_worked_yearbefore = `varlist' in `i'	
	
	*********************************
	* 1st year of life
	local year_end_1 = `year_end' + 1

	* For hours worked variables
	qui desc hrs_worked_`year_start'_`week_start' - hrs_worked_`year_end_1'_`week_end', varlist
	qui local varlist=usubinstr("`r(varlist)'", " ", " + ",.)
	qui replace hrs_worked_1year = `varlist' in `i'	
	
	* To adjust years with 53 weeks
	qui replace n_weeks_1year =  wordcount("`r(varlist)'") in `i'
	
	* For weeks worked variables	
	qui desc status_work_`year_start'_`week_start' - status_work_`year_end_1'_`week_end', varlist	
	qui local varlist=usubinstr("`r(varlist)'", " ", " + ",.)
	qui replace weeks_worked_1year = `varlist' in `i'
	
	*********************************
	* 2nd year of life
	local year_start_1 = `year_start' + 1
	local year_end_2   = `year_end' + 2

	* For hours worked variables
	qui desc hrs_worked_`year_start_1'_`week_start' - hrs_worked_`year_end_2'_`week_end', varlist
	qui local varlist=usubinstr("`r(varlist)'", " ", " + ",.)
	qui replace hrs_worked_2year = `varlist' in `i'	
	
	* To adjust years with 53 weeks
	qui replace n_weeks_2year =  wordcount("`r(varlist)'") in `i'

	* For weeks worked variables	
	qui desc status_work_`year_start_1'_`week_start' - status_work_`year_end_2'_`week_end', varlist
	qui local varlist=usubinstr("`r(varlist)'", " ", " + ",.)
	qui replace weeks_worked_2year = `varlist' in `i'
	
	*********************************
	* 3rd year of life
	local year_start_2 = `year_start' + 2
	local year_end_3   = `year_end' + 3

	* For hours worked variables
	qui desc hrs_worked_`year_start_2'_`week_start' - hrs_worked_`year_end_3'_`week_end', varlist
	qui local varlist=usubinstr("`r(varlist)'", " ", " + ",.)
	qui replace hrs_worked_3year = `varlist' in `i'	
	
	* To adjust years with 53 weeks
	qui replace n_weeks_3year =  wordcount("`r(varlist)'") in `i'
	
	* For weeks worked variables	
	qui desc status_work_`year_start_2'_`week_start' - status_work_`year_end_3'_`week_end', varlist
	qui local varlist=usubinstr("`r(varlist)'", " ", " + ",.)
	qui replace weeks_worked_3year = `varlist' in `i'
	
	
} /*end loop individuals*/
 
drop status_work_*	hrs_worked_????_? hrs_worked_????_??

drop merge_workmother - year_end


 
********************************************************************************
** CHILD COGNITIVE OUTCOMES 
********************************************************************************

** Open variables achievement outcomes children, keep relevant variables
merge 1:1 child_pid using "$rawpath\000_cnlsy_child_cognitiveoutcomes_raw.dta", nogen ///
keep(matched) 


* Clean missing
qui desc csage????_???? , varlist

foreach var in `r(varlist)' {
	qui replace `var'=. if `var'<0
}


* Clean missing
qui desc math1986_1986 - ppvtz2014_2014 , varlist

foreach var in `r(varlist)' {
	qui replace `var'=. if `var'<0
}


* First test taken
********************************************************************************
foreach test in ppvt recog math {

	qui gen first_year_`test' = .
	qui gen age_`test' = .
	qui gen `test' = .
	forvalues year=1986(2)2014 {
		
		qui replace first_year_`test' = `year' if first_year_`test' == . & (`test'z`year'_`year' != . & `test'z`year'_`year' > 0)

		qui replace age_`test' = csage`year'_`year' if age_`test'==. & (`test'z`year'_`year' != . & `test'z`year'_`year' > 0)
		
		qui replace `test' = `test'z`year'_`year' if `test'==. & (`test'z`year'_`year' != . & `test'z`year'_`year' > 0)

	}
}

drop math1986_1986 - ppvtz2014_2014


drop csage1986_1986 csage1988_1988 csage1990_1990 - csage2006_2006 csage2008_2008 - csage2014_2014


********************************************************************************
*CHILD RESIDENTIAL STATUS*******************************************************
********************************************************************************

merge m:1 mom_pid using "$rawpath\000_NLSY79_childresidencestatus.dta", ///
 keep(master matched) nogen

rename c4dob82_1982 c4res82_1982 
 
gen residence_child_0year = .
gen residence_child_1year = .
gen residence_child_2year = .
gen residence_child_3year = .


local N = _N
forvalues i=1(1)`N' { 

	* Each child DOB information
	local year_birth = dob_year in `i' 
	
	* Order of child
	local o = birth_order in `i' 
	
	* no info for odd years after 1994
	local year0 = `year_birth' 
	local year1 = `year_birth' + 1
	local year2 = `year_birth' + 2
	local year3 = `year_birth' + 3
	
	local yy0 = substr("`year0'",3,2)
	local yy1 = substr("`year1'",3,2)
	local yy2 = substr("`year2'",3,2)
	local yy3 = substr("`year3'",3,2)
	
	cap replace residence_child_0year = c`o'res`yy0'_`year0' in `i' 	
	cap replace residence_child_1year = c`o'res`yy1'_`year1' in `i' 
	cap replace residence_child_2year = c`o'res`yy2'_`year2' in `i'  
	cap replace residence_child_3year = c`o'res`yy3'_`year3' in `i'  
	
} 


** A lot of missing gonna define a varibles in the first years of life by prioritizing older age
gen residence_child = .

foreach i in 3 2 1 0 {
	replace residence_child = residence_child_`i'year ///
	if residence_child_`i'year!=. & residence_child==.
	
}

label define residence_child ///
1 "IN R'S HOUSEHOLD" ///
2 "WITH OTHER PARENT" ///
3 "WITH OTHER RELATIVES" ///
4 "IN FOSTER CARE" ///
5 "WITH ADOPTIVE PARENTS" ///
6 "LONG TERM CARE INSTITUTION" ///
7 "AWAY AT SCHOOL" ///
8 "DECEASED" ///
9 "LIVES PART-TIME WITH BOTH PARENTS" ///
10 "LIVES PART-TIME WITH R AND OTHER PERSON" ///
11 "OTHER" ///
99 "CHILD DELETED"
label values residence_child residence_child

drop c?res??_???? c??res??_????
drop c7rex90_1990 - c11res_dli_xrnd


********************************************************************************
** FINAL VARIABLES
********************************************************************************

* Average hours worked by mother in the three first years of life
gen double avg_hrs_worked_3years = (hrs_worked_1year + hrs_worked_2year + hrs_worked_3year)/3


* Average week worked by mother in the three first years of life
gen double avg_weeks_worked_3years = (weeks_worked_1year + weeks_worked_2year + weeks_worked_3year)/3


* Average hourly wage by mother in the three first years of life
egen double avg_wage_3years_mom =  rmean(wage_hour_year1_mom wage_hour_year2_mom wage_hour_year3_mom )


* Arbitrary correction
replace avg_wage_3years_mom = . if avg_wage_3years_mom>90
replace wage_hour_beforebirth_mom = . if wage_hour_beforebirth_mom>90


* Average HH income in the three first years of life in year of birth
egen double avg_income_hh_3years_mom = rmean(income_hh_year1_mom income_hh_year2_mom income_hh_year3_mom)
gen double lnavg_income_hh_3years_mom = ln(avg_income_hh_3years_mom)

egen double avg_income_hh_2yearsbefore_mom = rmean(income_hh_before1_mom income_hh_before2_mom)
gen double lnavg_income_hh_2yearsbefore_mom = ln(avg_income_hh_2yearsbefore_mom)


** Husband present x characteristics

*gen spouse_inc_atbirth_mom = spouse_atbirth*inc_spouse_mom
*replace spouse_inc_atbirth_mom = 0 if spouse_atbirth==0

gen spouse_educ_atbirth = spouse_atbirth*hgc_birth_spouse_mom
replace spouse_educ_atbirth = 0 if spouse_atbirth==0

* Dummy regions
gen north = (region_mom==1)
gen northcentral = (region_mom==2)
gen south = (region_mom==3)
gen west = (region_mom==4)


** Missing AFQT
gen missing_afqt = (afqt_mom==.)
*replace afqt_mom = 0 if missing_afqt==1


** Dummy worked before birth
gen double work_before_birth = (hrs_worked_yearbefore>0) if hrs_worked_yearbefore!=.


** Dummies year of birth
forvalues y=1979(1)2014 {
	gen d_birth_`y' = (dob_year==`y')
}


** Mother has some college 
gen higheduc_mom = (hgc_birth_mom>12) if hgc_birth_mom!=.

gen higheduc_ever_mom = (hgc_ever_mom>12) if hgc_ever_mom!=.




********************************************************************************
** DROPPING ON CONDITIONS

** Inconsistent working hours per week
gen hours_per_week_1year = hrs_worked_1year/weeks_worked_1year

gen hours_per_week_2year = hrs_worked_2year/weeks_worked_2year

gen hours_per_week_3year = hrs_worked_3year/weeks_worked_3year

gen above100hrs_week1year = (hours_per_week_1year>100 & hours_per_week_1year!=.)
gen above100hrs_week2year = (hours_per_week_2year>100 & hours_per_week_2year!=.)
gen above100hrs_week3year = (hours_per_week_3year>100 & hours_per_week_3year!=.)

gen above100hrs_week = (above100hrs_week1year==1 | above100hrs_week2year==1 | above100hrs_week3year==1)
*drop if above100hrs_week==1

gen avg_hrs_worked_perweek = avg_hrs_worked_3years/avg_weeks_worked_3years
replace avg_hrs_worked_perweek=0 if avg_hrs_worked_3years==0


********************************************************************************
** Final Sample

** Children not living with mother
drop if above100hrs_week==1
drop if residence_child!=1 & residence_child!=.


** Keep only complete observations, no missing

qui reg recog math ///
first_year_recog age_recog ///
first_year_math age_math ///
avg_hrs_worked_3years avg_weeks_worked_3years ///
male birth_order hispanic black ///
mother_age_birth afqt_mom hgc_birth_mom  ///
spouse_atbirth spouse_educ_atbirth lnfamsize ///
north south west d_birth_????

gen nomissing_cog=(e(sample)==1)



********************************************************************************
** Dummies for year took the first test

foreach test in recog math {
	
	* Test year
	forvalues y=1986(2)2014 {
		gen d_`y'_`test' = (first_year_`test'==`y')	
	}
}


********************************************************************************
** Cognitive by factor analysis
factor math recog if nomissing_cog==1, ipf factors(1) 
rotate
predict cog

** Age at cognitive test, based on math test (arbritary decision)
gen age_cog = age_math


** Create dummy based on math test (arbitrary decision)
forvalues year=1986(2)2014 {
	gen d_`year'_cog = d_`year'_math
}

drop d_????_math d_????_recog


compress

********************************************************************************
** Label Key Variables

label var ppvt "PPVT Score"
label var math "PIAT Math Score"
label var recog "PIAT Reading Score"
label var cog "Cognitive Skills: PIAT math and reading recognition"

label var avg_hrs_worked_3years "Average hours worked per year in first 3 years of child's life"
label var hrs_worked_1year "Hours worked per year in first year of child's life"
label var hrs_worked_2year "Hours worked per year in second year of child's life"
label var hrs_worked_3year "Hours worked per year in third year of child's life"
label var avg_weeks_worked_3years "Average weeks worked per year in first 3 years of child's life"


label var male "Sex of child (male=1, female=0)"
label var birth_order "Birth order of child"
label var hispanic "Child is hispanic"
label var black "Child is black"
label var mother_age_birth "Mother's age at birth of child"
label var afqt_mom "Mother's AFQT score"
label var hgc_birth_mom "Mother's highest grade completed at birth"
label var spouse_atbirth "Mother's spouse present at birth"
label var spouse_educ_atbirth "Spouse present at birth x spouse's highest grade completed"
label var lnavg_income_hh_3years_mom "Log average family income in the three years following year of birth"
label var lnfamsize "Log family size at birth"
label var north "Live in north region at birth"
label var northcentral  "Live in north central region at birth"
label var south "Live in south region at birth"
label var west  "Live in west region at birth"

label var avg_income_hh_3years_mom "Average family income in the three years following year of birth"
label var avg_wage_3years_mom "Average mother's wage in the three years following year of birth"


label var work_before_birth "Mother worked year before birth"


label var hgc_ever_mom  "Mother's highest grade ever completed"
label var higheduc_mom "Mother completed some college education or more by birth"
label var higheduc_ever_mom "Mother completed some college education or more"


forvalues y=1979(1)2014 {
	cap label var d_birth_`y' "Dummy year of birth `y'"
}


local yvars "cog"
foreach outcome in `yvars' {
	
	label var age_`outcome' "Age in month at test"
	
	forvalues year=1986(2)2014 {
		label var d_`year'_`outcome' "Dummy year of first test"
	}
	
}

** Final variables

rename avg_hrs_worked_3years hours
rename avg_weeks_worked_3years weeks
rename lnavg_income_hh_3years_mom income

*interact hours with >2080 indicator
gen indd = (hours > 2080)

gen top_bunch_hours = 0
replace top_bunch_hours = 1 if hours == 2080

gen upper_hours = 0
replace upper_hours = 1 if hours >= 2080

gen full_hours = 0
replace full_hours = 1 if hours >= 1500

label var indd "Average hours worked 3 years > 2080"
label var top_bunch_hours "Average hours worked 3 years = 2080"
label var upper_hours "Average hours worked 3 years >= 2080"
label var full_hours "Average hours worked 3 years >= 1500"


** Mother's education

gen hs_mom = 0
replace hs_mom = 1 if hgc_birth_mom == 12

gen hs_ls_mom = 0
replace hs_ls_mom = 1 if hgc_birth_mom < 12

gen col_sm_mom = 0
replace col_sm_mom = 1 if hgc_birth_mom > 12 & hgc_birth_mom < 16

gen col_mom = 0
replace col_mom = 1 if hgc_birth_mom == 16

gen col_pl_mom = 0
replace col_pl_mom = 1 if hgc_birth_mom > 16 & hgc_birth_mom != .
        
gen high_educ = 0
replace high_educ = 1 if hgc_birth_mom >= 16 & hgc_birth_mom != .

              

** New mother age variables

gen mother_teen = 0
replace mother_teen = 1 if mother_age_birth < 20

gen mother_2024 = 0
replace mother_2024 = 1 if mother_age_birth >= 20 & mother_age_birth <= 24

gen mother_2529 = 0
replace mother_2529 = 1 if mother_age_birth >= 25 & mother_age_birth <= 29

gen mother_3034 = 0
replace mother_3034 = 1 if mother_age_birth >= 30 & mother_age_birth <= 34

gen mother_old = 0
replace mother_old = 1 if mother_age_birth > 34 & mother_age_birth != .


label var hs_ls_mom "Mother's education less than high school" 
label var hs_mom "Mother's education completed high school" 
label var col_sm_mom "Mother's education some college" 
label var col_mom "Mother's education completed college" 
label var col_pl_mom "Mother's education more than college" 

label var mother_teen "Mother's age less than 20 years old"
label var mother_2024 "Mother's age 20 to 24 years old"
label var mother_2529 "Mother's age 25 to 29 years old"
label var mother_3034 "Mother's age 30 to 34 years old"
label var mother_old  "Mother's age 35 years old or more"

********************************************************************************
** Save Cognitive sample
keep if nomissing_cog==1

sum afqt_mom
gen double afqt_mom_orig = afqt_mom
replace afqt_mom = (afqt_mom - r(mean))/r(sd)

gen double afqt_mom2 = afqt_mom^2

label var afqt_mom_orig "Mother's AFQT original NLSY"
label var afqt_mom2 "Mother's AFQT squared"

save "$datapath\001_nlsy_child_mother_cog.dta", replace


